#爬取软件学报文章
import requests
from bs4 import BeautifulSoup
from dbUtils import DBTool
def scrawler(url,selector,flag):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0'}
    r=requests.get(url,headers=headers)
    codes = r.text
    bs = BeautifulSoup(codes,'html.parser')
    if flag==0:
        return [item.text for item in bs.select(selector=selector)]
    elif flag==1:
        return [item.attrs['href'] for item in bs.select(selector=selector)]

if __name__ == '__main__':
    #开始调用函数进行爬取
    url="https://www.ijsi.org/ijsi/article/abstract/326"
    db = DBTool()
    #首先要爬取到内容，再写入数据库

    title = scrawler(url,".p2 div.title",flag=0)
    author = scrawler(url,"#EnAuthorList > li > a",flag=0)
    abstract = scrawler(url,"#EnAbstractValue",flag=0)
    pdfurl = scrawler(url,"#PdfUrl",flag=1)
    pubdate = scrawler(url,"#all_issue_position > a:nth-child(2)",flag=0)
    if db.insert(title,author,abstract,pdfurl,pubdate):
        print(db.queryAll())

